Set up

library(data.table)
library(tidyverse)
library(Seurat)
library(patchwork)
library(reactable)
# set your working directory
setwd("~/Documents/HGEN_663/extra/lec8")

Integrating datasets

We’ll start with pre-computed Seurat objects.

Merging

# import MacCarroll dataset
Seurat_object_MacCarroll <- readRDS("Seurat_object_MacCarroll.rds")
Seurat_object_MacCarroll[["Dataset"]] <- "MacCarroll"

# import Joyal dataset
Seurat_object_Joyal <- readRDS("Seurat_object_Joyal.rds")
Seurat_object_Joyal[["Dataset"]] <- "Joyal"

# merge the 2 dataset
Seurat_object <- merge(Seurat_object_MacCarroll,
                       y = Seurat_object_Joyal,
                       add.cell.ids = NULL,
                       project = "Lab_integration")
# before proceeding, we note markers for cell cycle
Seurat_object <- CellCycleScoring(Seurat_object, set.ident = FALSE,
                                  s.features = cc.genes$s.genes,
                                  g2m.features = cc.genes$g2m.genes)
# we then go on to apply a transformation to the data and regresses out unwanted variation
vs <- c("nFeature_RNA", "percent.mito", "S.Score", "G2M.Score")
Seurat_object.list <- SplitObject(Seurat_object, split.by = "Dataset") %>%
  lapply(SCTransform, verbose = F, vars.to.regress = vs)

Integration

# select most consistently variable features
Seurat_object.features <- SelectIntegrationFeatures(object.list = Seurat_object.list,
                                                    nfeatures = 3000)

# some data wrangling
Seurat_object.list <- PrepSCTIntegration(object.list = Seurat_object.list,
                                         anchor.features = Seurat_object.features)

# identify anchors shared by the datasets
Seurat_object.anchors <- FindIntegrationAnchors(object.list = Seurat_object.list,
                                                normalization.method = "SCT", 
                                                anchor.features = Seurat_object.features)

# proceed with integration
Seurat_object <- IntegrateData(anchorset = Seurat_object.anchors,
                               normalization.method = "SCT")

Dimension reduction

Seurat_object <- RunPCA(Seurat_object) %>%
  RunUMAP(dims = 1:20) %>%
  RunTSNE(dims = 1:20)

Clustering

Seurat_object <- FindNeighbors(Seurat_object, 
                               dims = 1:2, 
                               reduction = "umap")
Seurat_object <- FindClusters(Seurat_object, 
                              resolution = 0.5, 
                              reduction = "umap")
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 8393
## Number of edges: 182804
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9519
## Number of communities: 30
## Elapsed time: 0 seconds

UMAP

Together

DimPlot(Seurat_object, reduction = "umap", label=TRUE)

Split

DimPlot(Seurat_object, reduction = "umap", label=TRUE, split.by="Dataset")

tSNE

Together

DimPlot(Seurat_object, reduction = "tsne", label=TRUE)

Split

DimPlot(Seurat_object, reduction = "tsne", label=TRUE, split.by="Dataset")


Identifying cell types

We could again identify markers genes as before

DefaultAssay(Seurat_object) <- "integrated"
Seurat_object.markers <- FindAllMarkers(Seurat_object, only.pos = TRUE,
                                        min.pct = 0.25, logfc.threshold = 0.25)
head(Seurat_object.markers) %>% reactable()

top10 <- Seurat_object.markers %>% 
  group_by(cluster) %>% 
  slice_max(avg_log2FC, n = 10) %>%
  ungroup()
DotPlot(
  Seurat_object,
  assay = NULL,
  unique(top10$gene),
  cols = c("blue", "red"),
  col.min = -2.5,
  col.max = 2.5,
  dot.min = 0,
  dot.scale = 6,
  group.by = NULL,
  split.by = NULL,
  scale.by = "radius",
  scale.min = NA,
  scale.max = NA
) + theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = .5))


Or again adopt known markers

markers.retina.dotplot <- toupper(c(
  "Rho",  #Rods
  "Opn1mw", #Cones
  "Trpm1", #Bipolar cells
  "Scgn", #Bipolar cells
  "Celf4", #Amacrine cells
  "Rgs5", #Pericytes
  "Cldn5", #Endothelial cells
  "Lyz2", #Immune cells
  "Glul", # Muller glia
  "Gfap", #Astrocytes
  "Optc", #Lens cells
  "Lhx1"  #Horizontal cells
))
DotPlot(
  Seurat_object,
  assay = NULL,
  markers.retina.dotplot,
  cols = c("blue", "red"),
  col.min = -2.5,
  col.max = 2.5,
  dot.min = 0,
  dot.scale = 6,
  group.by = NULL,
  split.by = NULL,
  scale.by = "radius",
  scale.min = NA,
  scale.max = NA
) + theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = .5))


Or make use of pre-existing labels annotated independently

DimPlot(Seurat_object, reduction = "umap", label = TRUE, 
        pt.size = 0.5, group.by="Cell_Type") + NoLegend()


Annotation

Activity 3: Assign as many cluster labels as you can based on the provided information

Seurat_object <- SetIdent(Seurat_object, cells = NULL, value="seurat_clusters")
new.cluster.ids <- c("",     # cluster 0
                     "",     # cluster 1
                     "",     # cluster 2
                     "",     # cluster 3
                     "",     # cluster 4
                     "",     # cluster 5
                     "",     # cluster 6
                     "",     # cluster 7
                     "",     # cluster 8
                     "",     # cluster 9
                     "",     # cluster 10
                     "",     # cluster 11
                     "",     # cluster 12
                     "",     # cluster 13
                     "",     # cluster 14
                     "",     # cluster 15
                     "",     # cluster 16
                     "",     # cluster 17
                     "",     # cluster 18
                     "",     # cluster 19
                     "",     # cluster 20
                     "",     # cluster 21
                     "",     # cluster 22
                     "",     # cluster 23
                     "",     # cluster 24
                     "",     # cluster 25
                     "",     # cluster 26
                     "",     # cluster 27
                     ""      # cluster 28
)
names(new.cluster.ids) <- levels(Seurat_object)
Seurat_object <- RenameIdents(Seurat_object, new.cluster.ids)

Seurat_object[["Cell_Type"]] <- Idents(object = Seurat_object)

DimPlot(Seurat_object, reduction = "umap", label = T)